Importing the necessary libraries¶

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
In [2]:
# igonre the warning

import os 
import warnings
warnings.filterwarnings('ignore')

Read the data from csv¶

In [3]:
Sources_df =pd.read_csv(r"D:\2-Project\9-Waterproject\Source.csv")
Sources_df
Out[3]:
SourceID Source Name Industry Sector
0 1 Residential -
1 2 Industrial Manufacturing
2 3 Healthcare Medical
3 4 Other Miscellaneous
In [4]:
Trtplant_df= pd.read_csv(r"D:\2-Project\9-Waterproject\Treatment Plant.csv")
Trtplant_df
Out[4]:
PlantID name Location Capacity
0 1 San Francisco Treatment Plant San Francisco, CA 100 million gallons per day
1 2 Oakland Treatment Plant Oakland, CA 50 million gallons per day
2 3 Sacramento Treatment Plant Sacramento, CA 25 million gallons per day
3 4 Los Angeles Treatment Plant Los Angeles, CA 150 million gallons per day
4 5 San Diego Treatment Plant San Diego, CA 100 million gallons per day
5 6 Phoenix Treatment Plant Phoenix, AZ 75 million gallons per day
6 7 Denver Treatment Plant Denver, CO 50 million gallons per day
7 8 Minneapolis Treatment Plant Minneapolis, MN 25 million gallons per day
8 9 Chicago Treatment Plant Chicago, IL 150 million gallons per day
9 10 New York Treatment Plant New York, NY 100 million gallons per day
In [5]:
WstWtrtrt_df = pd.read_csv(r"D:\2-Project\9-Waterproject\WasteWaterTreatmentFact.csv")
WstWtrtrt_df
Out[5]:
SampleID PlantID SourceID Date Volume of Water Treated Result
0 1 1 1 1/3/2022 33 Pass
1 2 1 2 1/3/2022 27 Fail
2 3 1 3 1/3/2022 28 Pass
3 4 2 1 1/3/2022 16 Pass
4 5 2 2 1/3/2022 16 Pass
... ... ... ... ... ... ...
1045 1046 9 2 2/6/2022 44 Pass
1046 1047 9 3 2/6/2022 42 Pass
1047 1048 10 1 2/6/2022 29 Pass
1048 1049 10 2 2/6/2022 29 Pass
1049 1050 10 3 2/6/2022 29 Fail

1050 rows × 6 columns

In [6]:
Sources_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   SourceID         4 non-null      int64 
 1   Source Name      4 non-null      object
 2   Industry Sector  4 non-null      object
dtypes: int64(1), object(2)
memory usage: 228.0+ bytes
In [7]:
Trtplant_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   PlantID   10 non-null     int64 
 1   name      10 non-null     object
 2   Location  10 non-null     object
 3   Capacity  10 non-null     object
dtypes: int64(1), object(3)
memory usage: 452.0+ bytes
In [8]:
WstWtrtrt_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1050 entries, 0 to 1049
Data columns (total 6 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   SampleID                 1050 non-null   int64 
 1   PlantID                  1050 non-null   int64 
 2   SourceID                 1050 non-null   int64 
 3   Date                     1050 non-null   object
 4   Volume of Water Treated  1050 non-null   int64 
 5   Result                   1050 non-null   object
dtypes: int64(4), object(2)
memory usage: 49.3+ KB

Data cleaning and Transfarmation Activities¶

In [9]:
# checking the null values and duplicate row
In [10]:
Sources_df.isnull().sum()
Out[10]:
SourceID           0
Source Name        0
Industry Sector    0
dtype: int64
In [11]:
Trtplant_df.isnull().sum()
Out[11]:
PlantID     0
name        0
Location    0
Capacity    0
dtype: int64
In [12]:
WstWtrtrt_df.isnull().sum()
Out[12]:
SampleID                   0
PlantID                    0
SourceID                   0
Date                       0
Volume of Water Treated    0
Result                     0
dtype: int64
In [13]:
# check duplicates values
In [14]:
Sources_df.duplicated().sum()
Out[14]:
0
In [15]:
Trtplant_df.duplicated().sum()
Out[15]:
0
In [16]:
WstWtrtrt_df.duplicated().sum()
Out[16]:
0
In [17]:
# change the data types
In [18]:
Sources_df.dtypes
Out[18]:
SourceID            int64
Source Name        object
Industry Sector    object
dtype: object
In [19]:
Trtplant_df.dtypes
Out[19]:
PlantID      int64
name        object
Location    object
Capacity    object
dtype: object
In [20]:
WstWtrtrt_df.dtypes
Out[20]:
SampleID                    int64
PlantID                     int64
SourceID                    int64
Date                       object
Volume of Water Treated     int64
Result                     object
dtype: object
In [21]:
WstWtrtrt_df['Date'] = pd.to_datetime(WstWtrtrt_df['Date'], format='mixed', dayfirst=True)
In [22]:
WstWtrtrt_df.dtypes
Out[22]:
SampleID                            int64
PlantID                             int64
SourceID                            int64
Date                       datetime64[ns]
Volume of Water Treated             int64
Result                             object
dtype: object
In [23]:
# Deriving new column day name

WstWtrtrt_df['Day Name']=WstWtrtrt_df['Date'].dt.day_name()
In [24]:
WstWtrtrt_df
Out[24]:
SampleID PlantID SourceID Date Volume of Water Treated Result Day Name
0 1 1 1 2022-03-01 33 Pass Tuesday
1 2 1 2 2022-03-01 27 Fail Tuesday
2 3 1 3 2022-03-01 28 Pass Tuesday
3 4 2 1 2022-03-01 16 Pass Tuesday
4 5 2 2 2022-03-01 16 Pass Tuesday
... ... ... ... ... ... ... ...
1045 1046 9 2 2022-06-02 44 Pass Thursday
1046 1047 9 3 2022-06-02 42 Pass Thursday
1047 1048 10 1 2022-06-02 29 Pass Thursday
1048 1049 10 2 2022-06-02 29 Pass Thursday
1049 1050 10 3 2022-06-02 29 Fail Thursday

1050 rows × 7 columns

In [25]:
WstWtrtrt_df['Day Name']
Out[25]:
0        Tuesday
1        Tuesday
2        Tuesday
3        Tuesday
4        Tuesday
          ...   
1045    Thursday
1046    Thursday
1047    Thursday
1048    Thursday
1049    Thursday
Name: Day Name, Length: 1050, dtype: object
In [26]:
WstWtrtrt_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1050 entries, 0 to 1049
Data columns (total 7 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   SampleID                 1050 non-null   int64         
 1   PlantID                  1050 non-null   int64         
 2   SourceID                 1050 non-null   int64         
 3   Date                     1050 non-null   datetime64[ns]
 4   Volume of Water Treated  1050 non-null   int64         
 5   Result                   1050 non-null   object        
 6   Day Name                 1050 non-null   object        
dtypes: datetime64[ns](1), int64(4), object(2)
memory usage: 57.6+ KB
In [27]:
# clean capacity column data
Trtplant_df['Capacity']
Out[27]:
0    100 million gallons per day
1     50 million gallons per day
2     25 million gallons per day
3    150 million gallons per day
4    100 million gallons per day
5     75 million gallons per day
6     50 million gallons per day
7     25 million gallons per day
8    150 million gallons per day
9    100 million gallons per day
Name: Capacity, dtype: object
In [28]:
Trtplant_df['Capacity'].str.split(' ').str.get(0).astype('int')
Out[28]:
0    100
1     50
2     25
3    150
4    100
5     75
6     50
7     25
8    150
9    100
Name: Capacity, dtype: int32
In [29]:
Trtplant_df['Capacity_new'] = Trtplant_df['Capacity'].str.split(' ').str.get(0).astype('int')
In [30]:
Trtplant_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   PlantID       10 non-null     int64 
 1   name          10 non-null     object
 2   Location      10 non-null     object
 3   Capacity      10 non-null     object
 4   Capacity_new  10 non-null     int32 
dtypes: int32(1), int64(1), object(3)
memory usage: 492.0+ bytes
In [31]:
Trtplant_df['Capacity_new']
Out[31]:
0    100
1     50
2     25
3    150
4    100
5     75
6     50
7     25
8    150
9    100
Name: Capacity_new, dtype: int32

Data Modeling¶

In [32]:
# Establin g new realtionship between sources_df and trtplant_df ,Trtfact_df

WstWtrtrt_Sources_df = pd.merge(WstWtrtrt_df,Sources_df,on ='SourceID',how='left')
In [33]:
WstWtrtrt_Sources_df
Out[33]:
SampleID PlantID SourceID Date Volume of Water Treated Result Day Name Source Name Industry Sector
0 1 1 1 2022-03-01 33 Pass Tuesday Residential -
1 2 1 2 2022-03-01 27 Fail Tuesday Industrial Manufacturing
2 3 1 3 2022-03-01 28 Pass Tuesday Healthcare Medical
3 4 2 1 2022-03-01 16 Pass Tuesday Residential -
4 5 2 2 2022-03-01 16 Pass Tuesday Industrial Manufacturing
... ... ... ... ... ... ... ... ... ...
1045 1046 9 2 2022-06-02 44 Pass Thursday Industrial Manufacturing
1046 1047 9 3 2022-06-02 42 Pass Thursday Healthcare Medical
1047 1048 10 1 2022-06-02 29 Pass Thursday Residential -
1048 1049 10 2 2022-06-02 29 Pass Thursday Industrial Manufacturing
1049 1050 10 3 2022-06-02 29 Fail Thursday Healthcare Medical

1050 rows × 9 columns

In [34]:
# Establish new relationship between trtplant_df & wstwtrt_df

WstWtrtrt_TrtPlant_df= pd.merge(WstWtrtrt_Sources_df,Trtplant_df,on='PlantID',how='left')
In [35]:
WstWtrtrt_TrtPlant_df
Out[35]:
SampleID PlantID SourceID Date Volume of Water Treated Result Day Name Source Name Industry Sector name Location Capacity Capacity_new
0 1 1 1 2022-03-01 33 Pass Tuesday Residential - San Francisco Treatment Plant San Francisco, CA 100 million gallons per day 100
1 2 1 2 2022-03-01 27 Fail Tuesday Industrial Manufacturing San Francisco Treatment Plant San Francisco, CA 100 million gallons per day 100
2 3 1 3 2022-03-01 28 Pass Tuesday Healthcare Medical San Francisco Treatment Plant San Francisco, CA 100 million gallons per day 100
3 4 2 1 2022-03-01 16 Pass Tuesday Residential - Oakland Treatment Plant Oakland, CA 50 million gallons per day 50
4 5 2 2 2022-03-01 16 Pass Tuesday Industrial Manufacturing Oakland Treatment Plant Oakland, CA 50 million gallons per day 50
... ... ... ... ... ... ... ... ... ... ... ... ... ...
1045 1046 9 2 2022-06-02 44 Pass Thursday Industrial Manufacturing Chicago Treatment Plant Chicago, IL 150 million gallons per day 150
1046 1047 9 3 2022-06-02 42 Pass Thursday Healthcare Medical Chicago Treatment Plant Chicago, IL 150 million gallons per day 150
1047 1048 10 1 2022-06-02 29 Pass Thursday Residential - New York Treatment Plant New York, NY 100 million gallons per day 100
1048 1049 10 2 2022-06-02 29 Pass Thursday Industrial Manufacturing New York Treatment Plant New York, NY 100 million gallons per day 100
1049 1050 10 3 2022-06-02 29 Fail Thursday Healthcare Medical New York Treatment Plant New York, NY 100 million gallons per day 100

1050 rows × 13 columns

Data Visualization¶

In [36]:
# Plant that is treating maximum and minimum volumne of waste water
WstWtrtrt_TrtPlant_df
Out[36]:
SampleID PlantID SourceID Date Volume of Water Treated Result Day Name Source Name Industry Sector name Location Capacity Capacity_new
0 1 1 1 2022-03-01 33 Pass Tuesday Residential - San Francisco Treatment Plant San Francisco, CA 100 million gallons per day 100
1 2 1 2 2022-03-01 27 Fail Tuesday Industrial Manufacturing San Francisco Treatment Plant San Francisco, CA 100 million gallons per day 100
2 3 1 3 2022-03-01 28 Pass Tuesday Healthcare Medical San Francisco Treatment Plant San Francisco, CA 100 million gallons per day 100
3 4 2 1 2022-03-01 16 Pass Tuesday Residential - Oakland Treatment Plant Oakland, CA 50 million gallons per day 50
4 5 2 2 2022-03-01 16 Pass Tuesday Industrial Manufacturing Oakland Treatment Plant Oakland, CA 50 million gallons per day 50
... ... ... ... ... ... ... ... ... ... ... ... ... ...
1045 1046 9 2 2022-06-02 44 Pass Thursday Industrial Manufacturing Chicago Treatment Plant Chicago, IL 150 million gallons per day 150
1046 1047 9 3 2022-06-02 42 Pass Thursday Healthcare Medical Chicago Treatment Plant Chicago, IL 150 million gallons per day 150
1047 1048 10 1 2022-06-02 29 Pass Thursday Residential - New York Treatment Plant New York, NY 100 million gallons per day 100
1048 1049 10 2 2022-06-02 29 Pass Thursday Industrial Manufacturing New York Treatment Plant New York, NY 100 million gallons per day 100
1049 1050 10 3 2022-06-02 29 Fail Thursday Healthcare Medical New York Treatment Plant New York, NY 100 million gallons per day 100

1050 rows × 13 columns

In [37]:
WstWtrtrt_TrtPlant_df_grouped = WstWtrtrt_TrtPlant_df.groupby('name',as_index = False)['Volume of Water Treated'].sum()
In [38]:
WstWtrtrt_TrtPlant_df_grouped
Out[38]:
name Volume of Water Treated
0 Chicago Treatment Plant 4577
1 Denver Treatment Plant 1298
2 Los Angeles Treatment Plant 4983
3 Minneapolis Treatment Plant 677
4 New York Treatment Plant 3145
5 Oakland Treatment Plant 1518
6 Phoenix Treatment Plant 2377
7 Sacramento Treatment Plant 668
8 San Diego Treatment Plant 2810
9 San Francisco Treatment Plant 3133
In [39]:
# plot total volume of waste water treated by different treatment plants 

fig =px.bar(WstWtrtrt_TrtPlant_df_grouped.sort_values(by='Volume of Water Treated',ascending=False),template='plotly_dark',x='name', y='Volume of Water Treated',color= 'name',text='Volume of Water Treated')
fig.update_layout(xaxis_title='Treatment Plant Name',yaxis_title = 'Volume of Waste Water Treated(million galon)',title=dict(text='Total Volume of waste Water Treated by Different Treatment Plants',x=0.5),width=1000)
In [40]:
# KPI 2 : % Contribution of waste water from different Sources

WstWtrtrt_Sources_df
Out[40]:
SampleID PlantID SourceID Date Volume of Water Treated Result Day Name Source Name Industry Sector
0 1 1 1 2022-03-01 33 Pass Tuesday Residential -
1 2 1 2 2022-03-01 27 Fail Tuesday Industrial Manufacturing
2 3 1 3 2022-03-01 28 Pass Tuesday Healthcare Medical
3 4 2 1 2022-03-01 16 Pass Tuesday Residential -
4 5 2 2 2022-03-01 16 Pass Tuesday Industrial Manufacturing
... ... ... ... ... ... ... ... ... ...
1045 1046 9 2 2022-06-02 44 Pass Thursday Industrial Manufacturing
1046 1047 9 3 2022-06-02 42 Pass Thursday Healthcare Medical
1047 1048 10 1 2022-06-02 29 Pass Thursday Residential -
1048 1049 10 2 2022-06-02 29 Pass Thursday Industrial Manufacturing
1049 1050 10 3 2022-06-02 29 Fail Thursday Healthcare Medical

1050 rows × 9 columns

In [41]:
WstWtrtrt_Sources_grouped_df=WstWtrtrt_Sources_df.groupby('Source Name',as_index=False)['Volume of Water Treated'].sum()
In [42]:
WstWtrtrt_Sources_grouped_df
Out[42]:
Source Name Volume of Water Treated
0 Healthcare 8403
1 Industrial 8408
2 Residential 8375
In [43]:
fig=px.pie(WstWtrtrt_Sources_grouped_df,names='Source Name',values='Volume of Water Treated',template='plotly_dark',hole=0.5)
fig.update_layout(width=500,title=dict(text='% Contribution of waste water from different Sources',x=0.5))
fig.show()
In [44]:
# KPI 3 : Identify Highly utilized Treatment Plant
# Summation on Volume of Waste Water on the basis of Plant name and Date
WstWtrtrt_TrtPlant_grp_vol_df=WstWtrtrt_TrtPlant_df.groupby(['name','Date'],as_index=False)['Volume of Water Treated'].sum().sort_values(by=['name','Date'])
In [45]:
# Taking mean of capacity_new column on the basis of Plant name and Date
WstWtrtrt_TrtPlant_grp_cap_df=WstWtrtrt_TrtPlant_df.groupby(['name','Date'],as_index=False)['Capacity_new'].mean().sort_values(by=['name','Date'])
In [46]:
# Identifying Utilization of Treatment Plants on Daily basis 
WstWtrtrt_TrtPlant_grp_cap_df['Utilization']=WstWtrtrt_TrtPlant_grp_vol_df['Volume of Water Treated']/WstWtrtrt_TrtPlant_grp_cap_df['Capacity_new'] * 100
In [47]:
# Rounding data to 2 places
WstWtrtrt_TrtPlant_grp_cap_df['Utilization']=WstWtrtrt_TrtPlant_grp_cap_df['Utilization'].round(2)
In [48]:
# Identifying average utilization of treatment plant
Avg_utilization_df=WstWtrtrt_TrtPlant_grp_cap_df.groupby('name',as_index=False)['Utilization'].mean().round(2)
In [49]:
# Plotting Utilization of Different Treatment Plants  
fig=px.bar(Avg_utilization_df.sort_values(by='Utilization'),x='Utilization',y='name',template='plotly_dark',color='Utilization',text='Utilization')
fig.update_layout(title=dict(text='Utilization of Treatment Plants',x=0.5),width=1000)
fig.show()
In [50]:
# KPI 4  Identifying Highly Efficient treatment plant

WstWtrtrt_TrtPlant_df
Out[50]:
SampleID PlantID SourceID Date Volume of Water Treated Result Day Name Source Name Industry Sector name Location Capacity Capacity_new
0 1 1 1 2022-03-01 33 Pass Tuesday Residential - San Francisco Treatment Plant San Francisco, CA 100 million gallons per day 100
1 2 1 2 2022-03-01 27 Fail Tuesday Industrial Manufacturing San Francisco Treatment Plant San Francisco, CA 100 million gallons per day 100
2 3 1 3 2022-03-01 28 Pass Tuesday Healthcare Medical San Francisco Treatment Plant San Francisco, CA 100 million gallons per day 100
3 4 2 1 2022-03-01 16 Pass Tuesday Residential - Oakland Treatment Plant Oakland, CA 50 million gallons per day 50
4 5 2 2 2022-03-01 16 Pass Tuesday Industrial Manufacturing Oakland Treatment Plant Oakland, CA 50 million gallons per day 50
... ... ... ... ... ... ... ... ... ... ... ... ... ...
1045 1046 9 2 2022-06-02 44 Pass Thursday Industrial Manufacturing Chicago Treatment Plant Chicago, IL 150 million gallons per day 150
1046 1047 9 3 2022-06-02 42 Pass Thursday Healthcare Medical Chicago Treatment Plant Chicago, IL 150 million gallons per day 150
1047 1048 10 1 2022-06-02 29 Pass Thursday Residential - New York Treatment Plant New York, NY 100 million gallons per day 100
1048 1049 10 2 2022-06-02 29 Pass Thursday Industrial Manufacturing New York Treatment Plant New York, NY 100 million gallons per day 100
1049 1050 10 3 2022-06-02 29 Fail Thursday Healthcare Medical New York Treatment Plant New York, NY 100 million gallons per day 100

1050 rows × 13 columns

In [51]:
succesful_Treated_df=WstWtrtrt_TrtPlant_df[WstWtrtrt_TrtPlant_df['Result']=='Pass'].groupby('name',as_index=False)['Volume of Water Treated'].sum().sort_values(by='name')
succesful_Treated_df
Out[51]:
name Volume of Water Treated
0 Chicago Treatment Plant 4110
1 Denver Treatment Plant 1221
2 Los Angeles Treatment Plant 4366
3 Minneapolis Treatment Plant 585
4 New York Treatment Plant 2792
5 Oakland Treatment Plant 1238
6 Phoenix Treatment Plant 2129
7 Sacramento Treatment Plant 543
8 San Diego Treatment Plant 2530
9 San Francisco Treatment Plant 2596
In [52]:
WstWtrtrt_TrtPlant_df_grouped = WstWtrtrt_TrtPlant_df_grouped.sort_values(by='name')
WstWtrtrt_TrtPlant_df_grouped
Out[52]:
name Volume of Water Treated
0 Chicago Treatment Plant 4577
1 Denver Treatment Plant 1298
2 Los Angeles Treatment Plant 4983
3 Minneapolis Treatment Plant 677
4 New York Treatment Plant 3145
5 Oakland Treatment Plant 1518
6 Phoenix Treatment Plant 2377
7 Sacramento Treatment Plant 668
8 San Diego Treatment Plant 2810
9 San Francisco Treatment Plant 3133
In [53]:
succesful_Treated_df['Volume of Water Treated']/WstWtrtrt_TrtPlant_df_grouped['Volume of Water Treated']
Out[53]:
0    0.897968
1    0.940678
2    0.876179
3    0.864106
4    0.887758
5    0.815547
6    0.895667
7    0.812874
8    0.900356
9    0.828599
Name: Volume of Water Treated, dtype: float64
In [54]:
succesful_Treated_df['Efficiency']=succesful_Treated_df['Volume of Water Treated']/WstWtrtrt_TrtPlant_df_grouped['Volume of Water Treated']*100
In [55]:
succesful_Treated_df['Efficiency']=succesful_Treated_df['Efficiency'].round(2)
succesful_Treated_df
Out[55]:
name Volume of Water Treated Efficiency
0 Chicago Treatment Plant 4110 89.80
1 Denver Treatment Plant 1221 94.07
2 Los Angeles Treatment Plant 4366 87.62
3 Minneapolis Treatment Plant 585 86.41
4 New York Treatment Plant 2792 88.78
5 Oakland Treatment Plant 1238 81.55
6 Phoenix Treatment Plant 2129 89.57
7 Sacramento Treatment Plant 543 81.29
8 San Diego Treatment Plant 2530 90.04
9 San Francisco Treatment Plant 2596 82.86
In [56]:
# Ploting effieciency of treatment plant

fig = px.bar(succesful_Treated_df.sort_values(by='Efficiency'),x='Efficiency',y='name',template='plotly_dark',color='Efficiency',text='Efficiency')
fig.update_layout(width=1000,title=dict(text='Efficiency of treatment plant' ,x=0.5))
In [57]:
# KPI 5 : Identify Day Wise Activity of Treatment Plant
WstWtrtrt_TrtPlant_df
Out[57]:
SampleID PlantID SourceID Date Volume of Water Treated Result Day Name Source Name Industry Sector name Location Capacity Capacity_new
0 1 1 1 2022-03-01 33 Pass Tuesday Residential - San Francisco Treatment Plant San Francisco, CA 100 million gallons per day 100
1 2 1 2 2022-03-01 27 Fail Tuesday Industrial Manufacturing San Francisco Treatment Plant San Francisco, CA 100 million gallons per day 100
2 3 1 3 2022-03-01 28 Pass Tuesday Healthcare Medical San Francisco Treatment Plant San Francisco, CA 100 million gallons per day 100
3 4 2 1 2022-03-01 16 Pass Tuesday Residential - Oakland Treatment Plant Oakland, CA 50 million gallons per day 50
4 5 2 2 2022-03-01 16 Pass Tuesday Industrial Manufacturing Oakland Treatment Plant Oakland, CA 50 million gallons per day 50
... ... ... ... ... ... ... ... ... ... ... ... ... ...
1045 1046 9 2 2022-06-02 44 Pass Thursday Industrial Manufacturing Chicago Treatment Plant Chicago, IL 150 million gallons per day 150
1046 1047 9 3 2022-06-02 42 Pass Thursday Healthcare Medical Chicago Treatment Plant Chicago, IL 150 million gallons per day 150
1047 1048 10 1 2022-06-02 29 Pass Thursday Residential - New York Treatment Plant New York, NY 100 million gallons per day 100
1048 1049 10 2 2022-06-02 29 Pass Thursday Industrial Manufacturing New York Treatment Plant New York, NY 100 million gallons per day 100
1049 1050 10 3 2022-06-02 29 Fail Thursday Healthcare Medical New York Treatment Plant New York, NY 100 million gallons per day 100

1050 rows × 13 columns

In [58]:
Dayname_wise_activty = WstWtrtrt_TrtPlant_df.groupby(['name','Day Name'],as_index = False)['Volume of Water Treated'].sum()
In [59]:
Dayname_wise_activty
Out[59]:
name Day Name Volume of Water Treated
0 Chicago Treatment Plant Friday 660
1 Chicago Treatment Plant Monday 639
2 Chicago Treatment Plant Saturday 652
3 Chicago Treatment Plant Sunday 657
4 Chicago Treatment Plant Thursday 781
... ... ... ...
65 San Francisco Treatment Plant Saturday 437
66 San Francisco Treatment Plant Sunday 451
67 San Francisco Treatment Plant Thursday 534
68 San Francisco Treatment Plant Tuesday 361
69 San Francisco Treatment Plant Wednesday 458

70 rows × 3 columns

In [60]:
fig = px.line(Dayname_wise_activty,x='name',y='Volume of Water Treated',color = 'Day Name',template='plotly_dark')
fig.update_layout(width=900,title=dict(text='Day wise Activity of Treatment Plant',x = 0.5))